import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import numpy as np
import os
import pandas as pd # needed for reading CSV
import seaborn as sns #trying a new library to plot
import plotly.express as px
import random
import bqplot
import traitlets
import ipywidgets
from ipywidgets import interact
df3=pd.read_csv("/Users/anuragsneh/Downloads/P1-quarterly-census-of-employment-and-wages-annual-data-beginning-2000.csv")
#df3.dtypes
#df3.head(20)
#df3[df3["Area"].str.match("New York State")].groupby("NAICS Title").sum()
#df3.Area.value_counts()
#df3[df3["Area"].str.match("Suffolk")].groupby("NAICS Title").
#df3[(df3["Area"].str.match("New York State")) & (df3["NAICS Title"].str.match("Accommodation")) ]
#df3_scatter=df3[(df3["NAICS Title"]=="Accommodation") & (df3["Area Type"]=="Labor Market Area")]
#plt.figure(figsize=(20, 10))
#plt.scatter(df3_scatter["Area"],df3_scatter["Average Employment"])
#len(df3_scatter)
#df3["Area Type"].value_counts()
#df3_scatter
#df3_area_type=df3.groupby("Area Type").max()
#print(df3_area_type)
#df3.columns
#plt.figure(figsize=(20,10))
#plt.plot(df3_area_type.index,df3_area_type["Annual Average Salary"])
#df3.columns
df3=df3.rename(columns=lambda x: x.strip())
#df3.columns
#len(df3)
df3.columns
#df3.head(100)
f=df3.groupby("Area Type").sum()
#df3
#df3[(df3["NAICS Title"]=="Crop Production") & (df3["Area"]!="New York State")].groupby("Area").sum()["Establishments"]
#only looking at county areas
df_county=df3[df3["Area Type"]=="County"]
#df_county
#df['mean_value'] = df.groupby('group').value.transform(np.mean)
df_county["Max Annual Average Salary"]=df_county[['Area','Annual Average Salary']].groupby("Area")["Annual Average Salary"].transform(np.max)
df_county["Minimum Annual Average Salary"]=df_county[['Area','Annual Average Salary']].groupby("Area")["Annual Average Salary"].transform(np.min)
#df_county
#df_county[df_county["Annual Average Salary"]==df_county["Max Annual Average Salary"]]
#we now have county wise information about the max Annual Average Salary and its associated field and Annual Average Salary
#df_county
figure_1 = px.scatter(df_county, x="Annual Average Salary", y="Max Annual Average Salary", log_x=True,hover_name="Area",hover_data=["NAICS Title"])
figure_1.update_layout(
title="Average Annual salaries in a county vs Max Average Annual salaries in a county",
xaxis_title="Annual Average Salary",
yaxis_title="Max Annual Average Salary",
font=dict(
family="Courier New, monospace",
size=20))
figure_1.show()
df_county_max=df_county[(df_county["Annual Average Salary"]==df_county["Max Annual Average Salary"])]
df_county_max
df_county_max.dtypes
from bqplot import *
from IPython.display import display
dropdown_1 = ipywidgets.Dropdown(
options=df_county_max.columns,
description='Field_1',
value='Average Employment',
disabled=False,
)
dropdown_2 = ipywidgets.Dropdown(
options=["NAICS Title","NAICS"],
description='Field_2',
value='NAICS Title',
disabled=False,
)
x = df_county_max[dropdown_1.value]
y = df_county_max[dropdown_2.value]
x_sc = bqplot.LinearScale()
y_sc = bqplot.OrdinalScale()
x_ax = bqplot.Axis(scale = x_sc , label = 'x')
y_ax = bqplot.Axis(scale = y_sc , label = 'y',orientation = 'vertical')
scatter = bqplot.Scatter(x=x,y=y,scales = {'x': x_sc , 'y' : y_sc})
fig = bqplot.Figure(axes=[x_ax, y_ax], marks=[scatter])
def dropdown_1_eventhandler(change):
#print(change)
dropdown_1 = change['new']
scatter.x = df_county_max[dropdown_1]
def dropdown_2_eventhandler(change):
dropdown_2 = change['new']
scatter.y = df_county_max[dropdown_2]
dropdown_1.observe(dropdown_1_eventhandler, names="value")
dropdown_2.observe(dropdown_2_eventhandler, names="value")
#display(dropdown_1)
#display(dropdown_2)
#display(fig)
display(ipywidgets.VBox([dropdown_1, dropdown_2, fig]))


EMPLOYMENT TRENDS IN NEW YORK STATE
In the Max Annual Average Salary Vs Annual Average Salary visualisation, we have introduced an interactive element that displays a holistic idea about the whole county in just one hover. On the X axis, we have plotted the annual average salary, while on the Y axis, we have plotted the Max annual average salary in that very county. Moreover, when you hover to a data point you can also have a look at the respective profession of that data point. As the user hovers around the dots, information like county, highest annual average salary of the prossional, average annual salary. The user is also able to capture a higher view indication of dispersion and variance in the annual average salary. We are easily able to detect the outliers just by hovering to those specific points. This would serve as a yardstick for the government to reassess the occupation disparity.
In the above interactive dashboard I have designed the visual such that we can ascertain the prime macro employement figures, Highest annual average salary, Minimum annual average salary, annual average salary. User has the option of electing the indicator he wished according to different profession's title or Government designated Pofession's Code for all the prevalent profession in a particular county. For a profession to be qualified as prevalent, it needs to have highest annual average salary. Every dot represent a county that exhibits the quanititave indicator chosen by the user. The user simply has click the drop down menu of field 1 and field2.
In field 1, we can observe the drop down menu of all the macro economic indicator Highest annual average salary, Minimum annual average salary, annual average salary. business establishments.
In field 2, we can opt to observe the above mentioned aggregate values either by profession's title or profession's code.
If the user, highest average annual salary and profession title, the user can directly see that the mining industry is highest paying as its data point is at the extreme right.
Contexual dataset- https://catalog.data.gov/dataset/quarterly-census-of-employment-and-wages-annual-data-beginning-2000 The contexual dataset shows the employement information 2000 until 2019. Our data set showcase the information from 2016. The contexual dataset can be used to understand the employement pattern much more closely and ascertain time function pattern.
Further, the post recession private employement growth in USA and NY charts showcase the employement growth ascendancy in Upstate New York, Downstate New York and the rest of USA. This chart works in fabulous conjunction with our two interactive dashboards. Though, our built charts conveys a reasonable peek into the employement condition of New York but when we have context of the whole of USA employement trends then we are able to understand how good or bad the situation is exactly. Ever since 2010, we can witness steady uptick in the private employement growth rate. Upstate NY seems to have an expanded economy that is capable of accomodating a higher growth rate comparable to Downtown NY and pan USA.
Chart1 helps us in identifying the wage changes across the 5 counties in NYC. The negative wage growth rate can be taken as a clarion call for the Union to start negotiating their wages with their employers. The Chart2, the employement rate Change in the 5 counties of the New York state along with USA average dotted lines helps in understanding how responsive the NY local county level economy stands in comparison to the USA average growth rate in Q1 phase of 2014-2015. The growth rate at 5 county in NYC is crucial in assessing the employement statistics in new york state as NYC is the most important area in terms of GDP contributions. The usa average line should act as a milestone that ought to be accomplished by every mayor.